This file is a supplementary data attached with the publication concerning the genetic determinism of Durum Wheat to the Fusarium head blight. It aims to describe the genetic map provided in this paper. The script allowing to build the genetic map is in the folder 1_Partie_Bioinfo.
Let’s upload this genetic map:
#Watch out, to reproduct analysis, you have to update the path.
map=read.table("/Users/yan/Dropbox/Publi_Fusariose/ANALYSIS_REPRO/DATA/map_avec_posi_physique.txt" , header=T )
# number of markers
nmark=nrow(map)This genetic map is composed of 14316 markers. Putative physical positions of markers are available.
Charge some libraries that will be useful
library(RColorBrewer)
library(xtable)
library(tidyverse)
library(rmdformats)
library(plotly)
library(knitr)Basic statistics are computed for every chromosomes of the genetic map, then for the A and B genomes, and finally for the whole genetic map. Results are presented in the table below:
#Let's create a function that calculate some basic statistics for a piece of map
my_fun=function(my_map){
num=nrow(bilan)
num=num+1
bilan[num,1]=i
bilan[num,2]=nrow(my_map)
bilan[num,3]=max(my_map[,3])
gaps= sort(my_map[,3])[-1] - sort(my_map[,3])[-length(my_map[,3])]
bilan[num,4]=mean(gaps)
bilan[num,5]=max(gaps)
bilan[num,6]=round(nrow(unique(my_map[,c(1,3)])),0)
return(bilan)
}
#Let's apply this function on our map, chromosome by chromosome and for the whole map:
# summary table that we are going to fill
bilan=data.frame(matrix(0,0,6)) ; num=0
colnames(bilan)=c("Chromo","nbr_marker","size_in_cM","average_gap","biggest_gap","nb_uniq_pos")
# apply the function to every chromosome
for(i in levels(map$group)){
map_K=map[map$group==i,]
bilan=my_fun(map_K)
}
# then to A and B genomes
for(i in c("A" , "B")){
map_K=map[substr(map$group , 2 , 2)==i , ]
bilan=my_fun(map_K)
}
# then to the whole map
i="tot"
bilan=my_fun(map)#print(xtable(bilan), type = "html", include.rownames = F , comment=FALSE)
kable(xtable(bilan))| Chromo | nbr_marker | size_in_cM | average_gap | biggest_gap | nb_uniq_pos |
|---|---|---|---|---|---|
| 1A | 721 | 175.4 | 0.2436111 | 15.2 | 122 |
| 1B | 1227 | 192.9 | 0.1573409 | 9.2 | 154 |
| 2A | 1195 | 222.3 | 0.1861809 | 12.4 | 165 |
| 2B | 1423 | 243.8 | 0.1714487 | 12.2 | 190 |
| 3A | 733 | 212.2 | 0.2898907 | 7.0 | 137 |
| 3B | 1221 | 241.0 | 0.1975410 | 14.5 | 187 |
| 4A | 969 | 224.3 | 0.2317149 | 13.0 | 151 |
| 4B | 1033 | 162.3 | 0.1572674 | 16.7 | 124 |
| 5A | 787 | 303.5 | 0.3861323 | 10.2 | 190 |
| 5B | 1180 | 259.2 | 0.2198473 | 9.3 | 182 |
| 6A | 803 | 190.6 | 0.2376559 | 6.5 | 136 |
| 6B | 1101 | 185.5 | 0.1686364 | 12.6 | 153 |
| 7A | 1187 | 239.4 | 0.2018550 | 9.5 | 177 |
| 7B | 736 | 214.5 | 0.2918367 | 8.5 | 130 |
| A | 6395 | 303.5 | 0.0474664 | 9.2 | 1078 |
| B | 7921 | 259.2 | 0.0327273 | 9.3 | 1120 |
| tot | 14316 | 303.5 | 0.0212015 | 9.2 | 2198 |
This table is saved as a supplementary material for the publication.
write.table(bilan, "../../../SUPPORTING_DATA/OR_map_feature.csv", sep=";", quote=F, row.names = F)ggplot(map, aes(y=position, x=group, color=ref)) +
geom_point() +
scale_y_reverse() For most of the SNPs, a physical position is available trough the Ensembl database. It is interesting to compare the physical and the genetic positions of markers when both informations are available.
This is the “marey map” representation. We will produce a figure for the paper with this code.
p=map %>%
filter(group==group_phy) %>%
ggplot(aes(x=position_phy, y=position, color=ref, text=marker)) +
geom_point(size=0.5, alpha=0.5) +
facet_wrap(~group, scales="free") +
theme(legend.position="none", axis.text=element_blank() , axis.ticks= element_blank()
)
ggplotly(p)Save this figure as a supplementary material
png("../../../SUPPORTING_DATA/OR_marey_map.png")
p
dev.off()## quartz_off_screen
## 2
Let’s calculate the spearman correlation between physical and genetic positions for each chromosome?
tb=map %>%
filter(group==group_phy & !is.na(group)) %>%
group_by(group) %>%
summarize(
cor_spearman=cor(position, position_phy, use="complete.obs", method="spearman") %>% round(2),
nvalue=length(position)
)
kable(xtable(tb))| group | cor_spearman | nvalue |
|---|---|---|
| 1A | 1.00 | 644 |
| 1B | 1.00 | 1099 |
| 2A | 0.99 | 1074 |
| 2B | 1.00 | 1312 |
| 3A | 1.00 | 685 |
| 3B | 1.00 | 1103 |
| 4A | 0.99 | 883 |
| 4B | 1.00 | 963 |
| 5A | 0.99 | 705 |
| 5B | 1.00 | 1083 |
| 6A | 1.00 | 739 |
| 6B | 1.00 | 969 |
| 7A | 1.00 | 1076 |
| 7B | 1.00 | 691 |
Do we have markers that have different chromosome attributions? Apparently we have some:
- between 7A and 4A
adjacency=table(map$group, map$group_phy)
kable(xtable(adjacency))| 1A | 1B | 2A | 2B | 3A | 3B | 4A | 4B | 5A | 5B | 6A | 6B | 7A | 7B | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1A | 644 | 30 | 0 | 1 | 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 3 |
| 1B | 57 | 1099 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
| 2A | 0 | 0 | 1074 | 71 | 1 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 |
| 2B | 0 | 0 | 59 | 1312 | 0 | 3 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
| 3A | 1 | 0 | 0 | 0 | 685 | 28 | 0 | 6 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3B | 0 | 1 | 0 | 0 | 50 | 1103 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
| 4A | 0 | 0 | 0 | 0 | 1 | 0 | 883 | 36 | 0 | 0 | 1 | 0 | 8 | 1 |
| 4B | 0 | 0 | 0 | 0 | 0 | 0 | 38 | 963 | 8 | 0 | 0 | 0 | 1 | 0 |
| 5A | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 6 | 705 | 49 | 0 | 0 | 0 | 0 |
| 5B | 0 | 1 | 0 | 0 | 0 | 0 | 7 | 0 | 58 | 1083 | 0 | 8 | 0 | 0 |
| 6A | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 739 | 35 | 2 | 0 |
| 6B | 0 | 2 | 0 | 0 | 0 | 1 | 1 | 1 | 0 | 1 | 49 | 969 | 0 | 0 |
| 7A | 0 | 1 | 0 | 0 | 0 | 0 | 32 | 1 | 0 | 0 | 0 | 2 | 1076 | 34 |
| 7B | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 28 | 691 |
diag(adjacency)=0
p=adjacency %>%
as.data.frame() %>%
rename(GeneticPosition=Var1, PhysicalPosition=Var2, NumberOfMarkers=Freq) %>%
ggplot( aes(x=GeneticPosition, y=PhysicalPosition, z=NumberOfMarkers, fill=NumberOfMarkers)) +
geom_tile() +
theme_bw()
ggplotly(p)